In [1]:
%matplotlib inline

In [2]:
import os
import sys

from ggplot import *

pyrnafold_path = os.path.abspath(os.path.join('..'))
if pyrnafold_path not in sys.path:
    sys.path.append(pyrnafold_path)
    
from pyrnafold.pyrnafold import trange_df, sig_positions

In [3]:
ls -lah ../data


total 1.6M
drwxrwxr-x 2 ilya ilya 4.0K Apr 18 13:26 ./
drwxrwxr-x 8 ilya ilya 4.0K Apr 18 12:55 ../
-rw-rw-r-- 1 ilya ilya 112K Apr 18 13:26 hHSR_35.txt
-rw-rw-r-- 1 ilya ilya 115K Apr 18 13:26 hHSR_36.txt
-rw-rw-r-- 1 ilya ilya 118K Apr 18 13:26 hHSR_37.txt
-rw-rw-r-- 1 ilya ilya 121K Apr 18 13:26 hHSR_38.txt
-rw-rw-r-- 1 ilya ilya 123K Apr 18 13:26 hHSR_39.txt
-rw-rw-r-- 1 ilya ilya 127K Apr 18 13:26 hHSR_40.txt
-rw-rw-r-- 1 ilya ilya 132K Apr 18 13:26 hHSR_41.txt
-rw-rw-r-- 1 ilya ilya 136K Apr 18 13:26 hHSR_42.txt
-rw-rw-r-- 1 ilya ilya 140K Apr 18 13:26 hHSR_43.txt
-rw-rw-r-- 1 ilya ilya 145K Apr 18 13:26 hHSR_44.txt
-rw-rw-r-- 1 ilya ilya 150K Apr 18 13:26 hHSR_45.txt
-rw-rw-r-- 1 ilya ilya  611 Apr 18 12:45 hHSR.fa
-rw-rw-r-- 1 ilya ilya 7.1K Apr 18 13:13 ROSE1_37.txt
-rw-rw-r-- 1 ilya ilya 7.8K Apr 18 13:13 ROSE1_38.txt
-rw-rw-r-- 1 ilya ilya 8.1K Apr 18 13:13 ROSE1_39.txt
-rw-rw-r-- 1 ilya ilya 8.2K Apr 18 13:13 ROSE1_40.txt
-rw-rw-r-- 1 ilya ilya 8.6K Apr 18 13:13 ROSE1_41.txt
-rw-rw-r-- 1 ilya ilya 8.8K Apr 18 13:13 ROSE1_42.txt
-rw-rw-r-- 1 ilya ilya 9.3K Apr 18 13:13 ROSE1_43.txt
-rw-rw-r-- 1 ilya ilya  19K Apr 18 12:58 ROSE1_dp.ps
-rw-rw-r-- 1 ilya ilya  126 Apr 18 12:46 rose.fa

Build a dataframe

trange_df parses .txt files containing base pairing probabilities for a given trange and builds a DataFrame that contains probability difference for each T relative to the lowest T in trange.


In [4]:
df = trange_df('../data/hHSR', trange=range(35, 45))
df


Out[4]:
pos Diff Temp
0 0 0.000000 36
1 1 0.002137 36
2 2 0.002143 36
3 3 0.001084 36
4 4 0.001551 36
5 5 0.002297 36
6 6 0.002091 36
7 7 0.001138 36
8 8 0.002524 36
9 9 0.004284 36
10 10 0.005770 36
11 11 0.006140 36
12 12 0.006634 36
13 13 0.001101 36
14 14 0.001096 36
15 15 0.006333 36
16 16 0.006235 36
17 17 0.006208 36
18 18 0.009629 36
19 19 0.011970 36
20 20 0.002286 36
21 21 0.000415 36
22 22 0.000374 36
23 23 0.000376 36
24 24 0.000389 36
25 25 0.001158 36
26 26 0.004279 36
27 27 0.003266 36
28 28 0.006666 36
29 29 0.004278 36
... ... ... ...
575 575 0.136961 44
576 576 0.008605 44
577 577 0.017564 44
578 578 0.020003 44
579 579 0.020030 44
580 580 0.019213 44
581 581 0.018791 44
582 582 0.018228 44
583 583 0.071693 44
584 584 0.001168 44
585 585 0.011023 44
586 586 0.015023 44
587 587 0.007591 44
588 588 0.006163 44
589 589 0.006088 44
590 590 0.002258 44
591 591 0.001765 44
592 592 0.060197 44
593 593 0.019381 44
594 594 0.019577 44
595 595 0.020931 44
596 596 0.005520 44
597 597 0.023830 44
598 598 0.020185 44
599 599 0.016449 44
600 600 0.012782 44
601 601 0.000806 44
602 602 0.001157 44
603 603 0.007710 44
604 604 0.013289 44

5445 rows × 3 columns


In [5]:
df.describe()


Out[5]:
pos Diff Temp
count 5445.000000 5445.000000 5445.000000
mean 302.000000 0.030726 40.000000
std 174.664258 0.042001 2.582226
min 0.000000 0.000000 36.000000
25% 151.000000 0.003713 38.000000
50% 302.000000 0.013258 40.000000
75% 453.000000 0.038685 42.000000
max 604.000000 0.254724 44.000000

In [6]:
g = ggplot(df, aes(xmin='pos-1',xmax='pos', ymin=0, ymax='Diff')) \
    + geom_rect() \
    + facet_wrap('Temp')
print(g)


<ggplot: (-9223363292714695957)>

In [10]:
df[sig_positions(df, num_sigma=3)]


Out[10]:
pos Diff Temp
474 474 0.157715 40
475 475 0.157546 40
476 476 0.157095 40
474 474 0.170586 41
475 475 0.184866 41
476 476 0.184355 41
477 477 0.183233 41
297 297 0.157668 42
302 302 0.172771 42
337 337 0.172771 42
341 341 0.157668 42
474 474 0.162031 42
475 475 0.210209 42
476 476 0.209649 42
477 477 0.208365 42
483 483 0.171425 42
487 487 0.165673 42
488 488 0.163947 42
489 489 0.162715 42
490 490 0.162722 42
491 491 0.163510 42
505 505 0.163510 42
506 506 0.162722 42
507 507 0.162715 42
508 508 0.163947 42
509 509 0.165673 42
511 511 0.163513 42
512 512 0.172585 42
513 513 0.170463 42
514 514 0.169901 42
... ... ... ...
480 480 0.158090 44
483 483 0.207394 44
487 487 0.208092 44
488 488 0.205906 44
489 489 0.204361 44
490 490 0.204371 44
491 491 0.205139 44
505 505 0.205139 44
506 506 0.204371 44
507 507 0.204361 44
508 508 0.205906 44
509 509 0.208092 44
511 511 0.193020 44
512 512 0.203825 44
513 513 0.201002 44
514 514 0.200197 44
515 515 0.199886 44
516 516 0.200614 44
517 517 0.161292 44
518 518 0.201256 44
519 519 0.195252 44
523 523 0.174697 44
543 543 0.195252 44
544 544 0.201256 44
546 546 0.200614 44
547 547 0.199886 44
548 548 0.200197 44
549 549 0.201002 44
550 550 0.203825 44
551 551 0.167955 44

144 rows × 3 columns


In [ ]: